# -*- coding: utf-8 -*-
from scrapy import Request
from zc_core.spiders.base import BaseSpider
from zc_core.model.items import Box
from baqc.rules import *
from zc_core.spiders.base import BaseSpider


class SkuSpider(BaseSpider):
    name = 'sku'
    # 常用链接
    index_url = 'http://183.62.155.68:8001/search.html'
    # catalogId, page, pageSize
    sku_list_url = 'http://183.62.155.68:8001/search.html?combo=0_0_{}_0_0_0&k=%2A&page={}&per_page={}'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        self.page_size = 30

    def start_requests(self):
        # 品类、品牌
        yield Request(
            url=self.index_url,
            meta={
                'batchNo': self.batch_no,
            },
            callback=self.parse_catalog_brand_supplier,
            errback=self.error_back,
            priority=250,
            dont_filter=True,
        )

    def parse_catalog_brand_supplier(self, response):
        # 处理品类列表
        cats = parse_catalog(response)
        if cats:
            self.logger.info('品类: count[%s],content:%s' % (len(cats), cats))
            yield Box('catalog', self.batch_no, cats)

        # 处理供应商列表
        suppliers = parse_supplier(response)
        if suppliers:
            self.logger.info('供应商: count[%s]' % len(suppliers))
            yield Box('supplier', self.batch_no, suppliers)

            for sp in suppliers:
                # 采集sku列表
                sp_id = sp.get('id')
                page = 1
                yield Request(
                    # catalogId, page, pageSize
                    # http://183.62.155.68/search.html?combo=0_0_0_0_0_0&commit=%E6%90%9C%E7%B4%A2&k=%2A&page=2&per_page=100&utf8=%E2%9C%93
                    url=self.sku_list_url.format(sp_id, page, self.page_size),
                    callback=self.parse_total_page,
                    errback=self.error_back,
                    meta={
                        'reqType': 'sku',
                        'batchNo': self.batch_no,
                        'supplierId': sp_id,
                        'page': page,
                    },
                    priority=200,
                    dont_filter=True
                )

    # 处理sku列表
    def parse_total_page(self, response):
        sp_id = response.meta.get('supplierId')
        total_page = parse_total_page(response)
        self.logger.info('总页数: total=%s, size=%s' % (total_page, self.page_size))
        for page in range(1, total_page + 1):
            # 采集sku列表
            yield Request(
                # catalogId, page, pageSize
                # http://183.62.155.68/search.html?combo=0_0_0_0_0_0&commit=%E6%90%9C%E7%B4%A2&k=%2A&page=2&per_page=100&utf8=%E2%9C%93
                url=self.sku_list_url.format(sp_id, page, self.page_size),
                callback=self.parse_sku,
                errback=self.error_back,
                meta={
                    'reqType': 'sku',
                    'batchNo': self.batch_no,
                    'supplierId': sp_id,
                    'page': page,
                },
                priority=200,
                dont_filter=True
            )

    # 处理sku列表
    def parse_sku(self, response):
        meta = response.meta
        cur_page = meta.get('page', 1)
        sp_id = meta.get('supplierId')
        # 处理商品
        sku_list = parse_sku(response)
        if sku_list:
            self.logger.info('清单: sp=%s, page=%s, cnt=%s' % (sp_id, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)
        else:
            self.logger.info('分页为空: sp=%s, page=%s' % (sp_id, cur_page))
